*----------------------------------------------------------------------------------------------------------	* 
* Silencing the Rails: A Study of the Noise-Safety Trade-off in Railway Quiet Zones                         *
* RESEARCHERS:		Emtiaz Hritan																		    *
* PROGRAMMED BY:	Emtiaz Hritan 					 											            *
* CREATED:			Oct. 4, 2023																		   	*
* LAST MODIFIED:	Oct. 7, 2025														       				*
*----------------------------------------------------------------------------------------------------------	*

clear all
set more off
* Set local paths
* Set this local datapath equal to the folder location for data
	local datapath "C:\Users\name\Replication Files Silencing the Rails\Data"
* Set this local outputpath equal to the folder location for outcome like tables
	local outputpath "C:\Users\name\Replication Files Silencing the Rails\Outcome"

* Install necessary packages
ssc install reghdfe
ssc install ftools
ssc install grstyle, replace
ssc install palettes, replace
ssc install colrspace, replace
ssc install ppmlhdfe, replace 
ssc install jwdid, replace 
ssc install csdid, replace 
ssc install drdid, replace 
ssc install did_imputation, replace
ssc install frause, replace
frause mpdta.dta, clear
ssc install hdfe, replace 
ssc install event_plot, replace
ssc install addplot, replace

**************************************************************************************************
*  Table 5—: Results: Impact of railway quietzone on residential property sales value
**************************************************************************************************
cd "`datapath'"

clear all

set maxvar 120000
use sales
* nbhd is the neighbourhood code. Let's get rid of other codes for neighbourhood first. 
drop neighborhood_code
tab nbhd
* Get sales date 
drop sales_date
gen sales_date = date(sale_date,"MDY",2010)
format sales_date %td


// Calculate the median sales price per neighbourhood per year
egen yearly_sales = median (sale_price) if !missing(year), by(nbhd year)
gen ln_yearly_sales= ln(yearly_sales)

* Get control variables: Unit square Foot , number of bedrooms

// Calculate the mean unit square foot per neighbourhood per year
egen mean_sqft = mean (new_building_sqft) if !missing(year), by(nbhd year)
egen mean_bedrooms = mean (num_bedrooms) if !missing(year), by(nbhd year)
egen mean_bathroom= mean (num_full_baths) if !missing(year), by(nbhd year)
egen mean_fireplace = mean (num_fireplaces) if !missing(year), by(nbhd year)
gen age=year- new_year_built
egen mean_age = mean (age) if !missing(year), by(nbhd year)


* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen whistle_year=year(WhistleDate)
egen min_quietzone_year = min(whistle_year), by(nbhd)
gen first_treat= min_quietzone_year
replace first_treat = 0 if missing(first_treat)

duplicates report nbhd year
duplicates drop nbhd year, force 

* Data is insufficient in 1999. 
drop if year==1999

* Drop Post_covid data

drop if year >2019
* Let's balance this data using tsfill
xtset nbhd year
tsfill, full
replace ln_yearly_sales=0 if yearly_sales==.

* Let's replace first_treat
// Sort your dataset by id and year
sort nbhd year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(nbhd)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset nbhd year


*********************Column (2) & (3):  CSDID************************ 

* Using csdid package 
local controls mean_bedrooms mean_sqft mean_bathroom mean_fireplace mean_age


csdid ln_yearly_sales `controls', ivar(nbhd) time(year) gvar(first_treat)
estat all


*Not yet treated 
local controls mean_bedrooms mean_sqft mean_bathroom mean_fireplace mean_age
csdid ln_yearly_sales `controls', ivar(nbhd) time(year) gvar(first_treat) notyet
estat all



*********************Column (4) & (5): JWDID************************ 

****Then I use jwdid package to implmemnt extended two-way Mundlak (ETWM) regression of "Wooldridge, Jeffrey. 2021.  Two-Way Fixed Effects, the Two-Way Mundlak Regression, and Differences-in-Differences estimators. Working paper."
*Estimation of ATTGTs without controls using not-yet treated groups
local controls mean_bedrooms mean_sqft mean_bathroom mean_fireplace mean_age
jwdid ln_yearly_sales `controls', ivar(nbhd) tvar(year) gvar(first_treat) 
estat simple
estimates store jwdid_notyet


*Using Never treated as controls
local controls mean_bedrooms mean_sqft mean_bathroom mean_fireplace mean_age
jwdid ln_yearly_sales `controls', ivar(nbhd) tvar(year) gvar(first_treat) never 
estat simple
estimates store jwdid_never


*********************Column (6): BJS Imputation************************ 

* Using did_imputation package based on Borusyak, Kirill, Xavier Jaravel, and Jann Spiess (2023). "Revisiting Event Study Designs: Robust and Efficient Estimation," Working paper.
gen Ei= first_treat
replace Ei=. if Ei==0
did_imputation ln_yearly_sales nbhd year Ei, fe(nbhd year)
estimates store bjs



*********************Column 1: TWFE************************ 

*Regular TWFE Model 
* Generate time dummy variable- post for observations occuring after first time becoming "Quiet Zone"
gen post_time=0 
replace post_time= 1 if year > first_treat
replace post_time=0 if first_treat==0

* Create treated
gen dty = 0
replace dty =1 if first_treat!=0 & post_time==1


* Let's remove the top 1% and bottom 1% of data as outliers
drop ln_yearly_sales
* Calculate the 1st and 99th percentiles of the mean sales prices
xtile p1 = sale_price, nq(100)
sum sale_price
* Drop the bottom 1% and top 1% of sales prices
drop if p1 <= 1 | p1 >= 99

* Verify the filtered data
sum yearly_sales

gen ln_yearly_sales= ln(yearly_sales)
local controls mean_bedrooms mean_sqft mean_bathroom mean_fireplace mean_age
reghdfe ln_yearly_sales dty `controls', a(nbhd year) vce(cluster nbhd)

eststo clear



*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------


*******************************************************************************
* Figure 8. & Figure A12: Event Study Estimates of Quiet Zone Effects on Home Prices.
*******************************************************************************



clear all

set maxvar 120000
use sales
* nbhd is the neighbourhood code. Let's get rid of other codes for neighbourhood first. 
drop neighborhood_code
tab nbhd
* Get sales date 
drop sales_date
gen sales_date = date(sale_date,"MDY",2010)
format sales_date %td


// Calculate the median sales price per neighbourhood per year
egen yearly_sales = median (sale_price) if !missing(year), by(nbhd year)
gen ln_yearly_sales= ln(yearly_sales)

* Get control variables: Unit square Foot , number of bedrooms

// Calculate the mean unit square foot per neighbourhood per year
egen mean_sqft = mean (new_building_sqft) if !missing(year), by(nbhd year)
egen mean_bedrooms = mean (num_bedrooms) if !missing(year), by(nbhd year)
egen mean_bathroom= mean (num_full_baths) if !missing(year), by(nbhd year)
egen mean_fireplace = mean (num_fireplaces) if !missing(year), by(nbhd year)
gen age=year- new_year_built
egen mean_age = mean (age) if !missing(year), by(nbhd year)


* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen whistle_year=year(WhistleDate)
egen min_quietzone_year = min(whistle_year), by(nbhd)
gen first_treat= min_quietzone_year
replace first_treat = 0 if missing(first_treat)

*duplicates report nbhd year
duplicates drop nbhd year, force 

* Data is insufficient in 1999. 
drop if year==1999

* Drop Post_covid data

drop if year >2019
* Let's balance this data using tsfill
xtset nbhd year
tsfill, full
replace ln_yearly_sales=0 if yearly_sales==.

* Let's replace first_treat
// Sort your dataset by id and year
sort nbhd year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(nbhd)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset nbhd year


* Using csdid package 
local controls mean_bedrooms mean_sqft mean_bathroom mean_fireplace mean_age
csdid ln_yearly_sales `controls', ivar(nbhd) time(year) gvar(first_treat)
estat event, window(-10 10) estore(cs1)
csdid_plot

*Not yet treated 
local controls mean_bedrooms mean_sqft mean_bathroom mean_fireplace mean_age
csdid ln_yearly_sales `controls', ivar(nbhd) time(year) gvar(first_treat) notyet
estat event, window(-10 10) estore(cs2)
csdid_plot

*******************************************************************************
* Figure A12. Panel (b) : Alternative Event Study Estimates of Quiet Zone Effects on Home Prices.
*******************************************************************************

*Using Never treated as controls
local controls mean_bedrooms mean_sqft mean_bathroom mean_fireplace mean_age
jwdid ln_yearly_sales `controls', ivar(nbhd) tvar(year) gvar(first_treat) never 
estat simple
estat event, window(-10 10)
estimates store jwdid_never
jwdid_plot
graph export "res_event_jwdid.pdf", replace

*******************************************************************************
* Figure A12. Panel (a) : Alternative Event Study Estimates of Quiet Zone Effects on Home Prices.
*******************************************************************************


* Using did_imputation package based on Borusyak, Kirill, Xavier Jaravel, and Jann Spiess (2023). "Revisiting Event Study Designs: Robust and Efficient Estimation," Working paper.
gen Ei= first_treat
replace Ei=. if Ei==0
did_imputation yearly_sales nbhd year Ei, horizons(0/10) pretrend(10) minn(0)
estimates store bjs1 
event_plot, default_look graph_opt(xtitle("Years Since the Establishment") ytitle("Effect on Residential Sales Prices") xlabel(-10(5)10 10)legend(pos(6))) 
graph export "res_event_bjs.pdf", replace

*Regular TWFE Model 

* Let's remove the top 1% and bottom 1% of data as outliers
drop ln_yearly_sales
* Calculate the 1st and 99th percentiles of the mean sales prices
xtile p1 = sale_price, nq(100)
sum sale_price
* Drop the bottom 1% and top 1% of sales prices
drop if p1 <= 1 | p1 >= 99

* Verify the filtered data
sum yearly_sales

gen ln_yearly_sales= ln(yearly_sales)

* OLS Event study
g time_to_treat = year - first_treat
tab time_to_treat
 // creating dummies for the lags 0..9, based on K = number of periods since treatment (or missing if there is a never-treated group)
        forvalues l = 0/9 {
                gen L`l'event = time_to_treat==`l'
        }
        gen L10event = time_to_treat>=10 // binning K=10 and above
        
        // creating dummies for the leads 1..14
        forvalues l = 0/9 { 
                gen F`l'event = time_to_treat==-`l'
        }
        gen F10event = time_to_treat<=-10 // binning K=-14 and below
		
		local controls mean_bedrooms mean_sqft mean_bathroom mean_fireplace mean_age
        
        // running the event study regression. Drop leads 1 
        reghdfe ln_yearly_sales o.F1event F2event-F10event L*event, a(nbhd year) vce(cluster nbhd)
               
        // plotting the coeffients
        event_plot, default_look stub_lag(L#event) stub_lead(F#event) together plottype(scatter) ///
                graph_opt(xtitle("Year Since the Establishment of Quiet Zone") ytitle("OLS coefficients") xlabel(-10(5)10 10) legend(pos(6)))
        
    estimates store ols1 // saving the estimates for later


	

*******************************************************************************
* Figure 8. : Event Study Estimates of Quiet Zone Effects on Home Prices.
*******************************************************************************


event_plot cs1 ols1, ///
	stub_lag(Tp# L#event) stub_lead(Tm#  F#event ) plottype(scatter) ciplottype(rcap) ///
	together perturb(-0.325(0.13)0.325) trimlead(5) trimlag(5) noautolegend ///
	graph_opt(title("Event Study: Average Quiet Zone Effect on Residential Sales Prices", size(medlarge)) ///
		xtitle("Years Since Establishment of Quiet Zone") ytitle("Average causal effect") xlabel(-5(1)5) ylabel(-1(.5)1) ///
		legend(pos(6) order( 1 "Callaway & Sant'Anna"  3 "OLS TWFE") rows(1) region(style(none))) ///
/// the following lines replace default_look with something more elaborate
		xline(-1, lcolor(gs8) lpattern(dash)) yline(0, lcolor(gs8)) graphregion(color(white)) bgcolor(white) ylabel(, angle(horizontal)) ///
	) ///
	lag_opt1(msymbol(+) color(cranberry)) lag_ci_opt1(color(cranberry)) ///
	lag_opt2(msymbol(Oh) color(navy)) lag_ci_opt2(color(navy)) ///
	lag_opt3(msymbol(Dh) color(forest_green)) lag_ci_opt3(color(forest_green)) 	lag_opt4(msymbol(Th) color(purple)) lag_ci_opt4(color(purple))	
		
graph export "res_event_csdid_ols.pdf", replace	

eststo clear
*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------







